Epimutable genes were selected as MAplot calls with padj<1e-4.

setwd("/Users/ab6415/Bioinformatics/Analysis/EpiMALs_PAPER_ANALYSES/03_Figure1/")

library(MASS)
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
library(msir)
## Package 'msir' version 1.3.2
## Type 'citation("msir")' for citing this R package in publications.
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
theme_set(theme_bw(base_size = 16))

ttg_DEseqnorm<-read.table("../02_Normalised_counts/22G_DEseqnorm_counts_averaged.txt"); ttg_DEseqnorm_gens25_100<-ttg_DEseqnorm[,25:47]
ttg_MAplot_data<-read.table("../03_Figure1/MAplot_filtering_22Gs_gens25100_p1e-2.txt",header=TRUE)

ttg_MAplot_data_p1Eminus4<-ttg_MAplot_data[which(ttg_MAplot_data$padj<1e-4),]
epimutable_genes<-unique(ttg_MAplot_data_p1Eminus4$ID)
length(epimutable_genes)
## [1] 442

Correlation analysis

Correlation analysis on the subset of epimutable genes showed decreasing correlations with distance in generations. This analysis reveals C100, I100, K100 and L100 as outliers - lines showing much lower correlation with the rest of samples. The source of this is difference is unclear, when growing the lines they did not seem to be sick compared to the rest. Including those in the correlation analysis makes the correlations scale with the distance in generations better, but this is only driven by these 4.

When removing these 4 samples, the difference in correlation is significant up the the 50-75 generation comparison, and it stabilises beyond that, consistent with the lack of long-term epigenetic inheritance overall.

#load matrix with the number of generations separating each pair of samples
numgenerations<-read.table("numgenerations_matrix.txt")


#function to plot the correlation coefficients as a function of the distance between samples
cor_analysis<-function(cpm_data,maplot_calls,heatmapfilename,boxplotfilename,dotplotfilename){
  
  filt_cpm_data<-cpm_data[which(rownames(cpm_data) %in% maplot_calls$ID),]
  cor_matrix<-cor(log2(filt_cpm_data+1))
  
  numchanges_matrix<-matrix(nrow=nrow(cor_matrix),ncol=ncol(cor_matrix))
  rownames(numchanges_matrix)<-rownames(cor_matrix); colnames(numchanges_matrix)<-colnames(cor_matrix)
  for (row in rownames(numchanges_matrix)){
    for (col in colnames(numchanges_matrix)){
      numchanges_matrix[row,col]<-nrow(maplot_calls[which(maplot_calls$line1==row & maplot_calls$line2==col),])
    }
  }
  
  tit<-"442 genes, p<1e-4"
  
  numgen_reordered<-numgenerations[rownames(cor_matrix),colnames(cor_matrix)]
  
  heatmap.2(cor_matrix,trace="none",dendrogram = "none",Rowv = FALSE,Colv = FALSE,main = tit)
  
  pdf(heatmapfilename)
  heatmap.2(cor_matrix,trace="none",dendrogram = "none",Rowv = FALSE,Colv = FALSE,main = tit)
  dev.off()
  
  numgen_vector<-numgen_reordered[upper.tri(numgen_reordered)]
  cor_vector<-cor_matrix[upper.tri(cor_matrix)]
  numchanges_vector<-numchanges_matrix[upper.tri(numchanges_matrix)]
  
  numgen_df<-data.frame(ng_vect=numgen_vector,
                        cor_vect=cor_vector,
                        nch_vect=numchanges_vector)
  numgen_df$ng_vect<-as.factor(numgen_df$ng_vect)

  cors_boxplots <- ggplot(numgen_df, aes(x=ng_vect, y=cor_vect,fill=ng_vect))+ ggtitle(tit) + xlab("generations separating lines") + ylab("correlation coefficient of DE 22G set")+geom_boxplot()+theme_classic()+scale_fill_brewer(aesthetics =c("fill","color"),palette = "GnBu")
  ggsave(plot=cors_boxplots,filename = boxplotfilename,dpi="retina")
  print(cors_boxplots)
  
    cors_dotplots <- ggplot(numgen_df,aes(x=ng_vect, y=cor_vect))+geom_dotplot(binaxis='y', stackdir='center',aes(fill=factor(ng_vect),color=factor(ng_vect)))+theme_classic()+ ggtitle(tit) + xlab("generations separating lines") + ylab("correlation coefficient of DE 22G set")+scale_fill_brewer(aesthetics =c("fill","color"),palette = "GnBu")+stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
                 geom = "crossbar", width = 0.5,color="black")
  print(cors_dotplots)
  ggsave(plot=cors_dotplots,filename = dotplotfilename,dpi="retina")

  
  #numchanges_boxplots <- ggplot(numgen_df, aes(x=numgen_vector, y=numchanges_vector))+ ggtitle(tit) + xlab("generations separating lines") + ylab("number of DE22Gs")+geom_boxplot()
  #print(numchanges_boxplots)

  numgen_df$ng_vect<-as.numeric(as.character(numgen_df$ng_vect))
  
  print(wilcox.test(numgen_df[which(numgen_df$ng_vect==25),"cor_vect"],
              numgen_df[which(numgen_df$ng_vect==50),"cor_vect"]))
  
  print(wilcox.test(numgen_df[which(numgen_df$ng_vect==50),"cor_vect"],
              numgen_df[which(numgen_df$ng_vect==75),"cor_vect"]))
  
  print(wilcox.test(numgen_df[which(numgen_df$ng_vect==75),"cor_vect"],
              numgen_df[which(numgen_df$ng_vect==100),"cor_vect"]))
  
  print(wilcox.test(numgen_df[which(numgen_df$ng_vect==100),"cor_vect"],
              numgen_df[which(numgen_df$ng_vect==125),"cor_vect"]))
  
  print(wilcox.test(numgen_df[which(numgen_df$ng_vect==125),"cor_vect"],
              numgen_df[which(numgen_df$ng_vect==200),"cor_vect"]))
  
}


#including outlier lines C100,I100,K100,L100
cor_analysis(ttg_DEseqnorm_gens25_100,ttg_MAplot_data_p1Eminus4,
             heatmapfilename="correlation_heatmap_withCIKL.pdf",
             boxplotfilename="correlation_boxplots_withCIKL.pdf",
             dotplotfilename = "correlation_dotplots_withCIKL.pdf")
## Saving 7 x 5 in image

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 25), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"]
## W = 465, p-value = 0.005314
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"]
## W = 521, p-value = 0.0001762
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test
## 
## data:  numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"]
## W = 65, p-value = 0.7969
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"]
## W = 619, p-value = 0.9031
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 200), "cor_vect"]
## W = 3656, p-value = 0.0293
## alternative hypothesis: true location shift is not equal to 0
#removing outlier lines C100,I100,K100,L100
cor_analysis(ttg_DEseqnorm_gens25_100[,-c(15,20,22,23)],ttg_MAplot_data_p1Eminus4,
             heatmapfilename="correlation_heatmap_withoutCIKL.pdf",
             boxplotfilename="correlation_boxplots_withoutCIKL.pdf",
             dotplotfilename = "correlation_dotplots_withoutCIKL.pdf")
## Saving 7 x 5 in image

## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 25), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"]
## W = 465, p-value = 0.005314
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 50), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"]
## W = 301, p-value = 0.0163
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test
## 
## data:  numgen_df[which(numgen_df$ng_vect == 75), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"]
## W = 28, p-value = 0.7104
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 100), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"]
## W = 264, p-value = 0.7431
## alternative hypothesis: true location shift is not equal to 0
## 
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numgen_df[which(numgen_df$ng_vect == 125), "cor_vect"] and numgen_df[which(numgen_df$ng_vect == 200), "cor_vect"]
## W = 681, p-value = 0.6143
## alternative hypothesis: true location shift is not equal to 0

Estimates of epimutation totals

For the set of epimutable genes, we used unsupervised k-means clustering to define 22G expression levels as low (1) or high (2) for all samples in the dataset. Then using this data, we calculated the total number of changes that we see between samples.

When we include the outlier samples, we see that there is a significant difference between the observed number of changes within the 25 generation samples and the 100th generation samples, in other words, the 100th generation samples seem to be more divergent in their 22G expression states.

ttg_DEseqnorm_epimutable_genes<-ttg_DEseqnorm[which(rownames(ttg_DEseqnorm) %in% epimutable_genes),]

kmeans_row<-function(vector){  
  kmc<-kmeans(x = as.numeric(vector),centers=2)
  high<-kmc$cluster[which.is.max(vector)]
  if (high==2){return(kmc$cluster)
  }else{return(abs(kmc$cluster-3))}
}

library(nnet)

ttg_segmentation_epimutable_genes<-matrix(0,nrow=nrow(ttg_DEseqnorm_epimutable_genes),ncol = 47)
colnames(ttg_segmentation_epimutable_genes)<-colnames(ttg_DEseqnorm_epimutable_genes)
rownames(ttg_segmentation_epimutable_genes)<-rownames(ttg_DEseqnorm_epimutable_genes)

for (gene in rownames(ttg_DEseqnorm_epimutable_genes)){
      ttg_segmentation_epimutable_genes[gene,]<-kmeans_row(ttg_DEseqnorm_epimutable_genes[gene,])
}

heatmap.2(as.matrix(ttg_segmentation_epimutable_genes[,25:47]),col = colorRampPalette(c("#67a9cf","#de2d26")),Colv=FALSE,trace="none",dendrogram = "none")

heatmap.2(cor(as.matrix(ttg_segmentation_epimutable_genes[,25:47])),Colv=FALSE,trace="none",dendrogram = "none")

diffs<-c()
line1<-c()
line2<-c()
distances<-c()

for (l1 in colnames(ttg_segmentation_epimutable_genes[,25:47])){
  for (l2 in colnames(ttg_segmentation_epimutable_genes[,25:47])){
    
  diffs<-c(diffs,sum(abs(ttg_segmentation_epimutable_genes[,l1]-ttg_segmentation_epimutable_genes[,l2])))
  line1<-c(line1,l1); line2<-c(line2,l2)
  
  if (l1=="PMA"){l1<-"P0"}
  if (l2=="PMA"){l2<-"P0"}
        
  lineage_1<-substr(l1,start = 1,stop = 1)
  gen_1<-as.numeric(substr(l1,start=2,stop=nchar(l1)))
  lineage_2<-substr(l2,start = 1,stop = 1)
  gen_2<-as.numeric(substr(l2,start=2,stop=nchar(l2)))
        
  if (lineage_1!=lineage_2){
        distance=abs(gen_1+gen_2)
        }
  else if (lineage_1==lineage_2){
        distance=abs(gen_1-gen_2)
        }
  distances<-c(distances,distance)
  
  if (l1=="P0"){l1<-"PMA"}
  if (l2=="P0"){l2<-"PMA"}

}}

Estimation of epimutation totals

From the k-means segmented data, we estimate epimutation totals in two alternative ways:

  • comparing each sample to the starting line (PMA)
  • comparing samples pairwise to each other within the 25th generation and 100th generation set - this has the extra advantage that controls for potential batch effects resulting from the fact that the PMA sample collection was carried out together with the 25th generation lines; however the estimate is less direct.

In both estimations, we do not see clear differences between the total epimutation numbers in gen25 and gen100 lines - consistent with no long-term inheritance.

Pairwise analysis

In this case, including the outlier lines has a large effect in the totals.

numchanges_df<-data.frame(num_changes=diffs,line1=line1,line2=line2,distance=distances)
ggplot(numchanges_df)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+theme_classic()+scale_fill_brewer(aesthetics =c("fill"),palette = "GnBu")

ggsave("number_of_changes_boxplot_gens25_100_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df)+geom_boxplot(aes(y=num_changes/nrow(ttg_segmentation_epimutable_genes),x=factor(distance)))

ggsave("proportion_of_changes_boxplot_gens25_100_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
numchanges_df_50_200<-numchanges_df[which(numchanges_df$distance==50 | numchanges_df$distance==200),]
ggplot(numchanges_df_50_200)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+ scale_fill_manual(values=c("#66c2a5", "#8da0cb"))+theme_classic()

ggsave("number_of_changes_boxplot_withinbatch_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df_50_200,aes(x=factor(distance),y=num_changes,fill=factor(distance),color=factor(distance)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=19,size=5,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("number_of_changes_dotplot_withinbatch_withCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
#testing differences in the number of changes
wilcox.test(numchanges_df[which(numchanges_df$distance==50),"num_changes"],
            numchanges_df[which(numchanges_df$distance==200),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 50), "num_changes"] and numchanges_df[which(numchanges_df$distance == 200), "num_changes"]
## W = 2366, p-value = 5.954e-15
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==25),"num_changes"],
            numchanges_df[which(numchanges_df$distance==50),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 25), "num_changes"] and numchanges_df[which(numchanges_df$distance == 50), "num_changes"]
## W = 744, p-value = 0.004453
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==50),"num_changes"],
            numchanges_df[which(numchanges_df$distance==75),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 50), "num_changes"] and numchanges_df[which(numchanges_df$distance == 75), "num_changes"]
## W = 350, p-value = 1.51e-07
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==75),"num_changes"],
            numchanges_df[which(numchanges_df$distance==100),"num_changes"])
## Warning in wilcox.test.default(numchanges_df[which(numchanges_df$distance == :
## cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 75), "num_changes"] and numchanges_df[which(numchanges_df$distance == 100), "num_changes"]
## W = 190, p-value = 0.2262
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==100),"num_changes"],
            numchanges_df[which(numchanges_df$distance==125),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 100), "num_changes"] and numchanges_df[which(numchanges_df$distance == 125), "num_changes"]
## W = 2706, p-value = 0.3617
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df[which(numchanges_df$distance==125),"num_changes"],
            numchanges_df[which(numchanges_df$distance==200),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df[which(numchanges_df$distance == 125), "num_changes"] and numchanges_df[which(numchanges_df$distance == 200), "num_changes"]
## W = 13114, p-value = 0.2147
## alternative hypothesis: true location shift is not equal to 0

Direct comparison to PMA

We can see the 4 outlier lines on the top part of the 100th gen epimutation totals distribution, but the median is still similar between 25th and 100th generation lines.

ttg_segmentation_epimutable_genes_25_100<-ttg_segmentation_epimutable_genes[,25:47]

ttg_segmentation_epimutable_genes_25_100<-data.frame(ttg_segmentation_epimutable_genes_25_100)

epimuts_maintained<-function(df,line1,line2){
  epimuts_25<-abs(df[,line1]-df[,"PMA"])
  epimuts_100<-abs(df[,line2]-df[,"PMA"])
  print(c(line1,length(which(epimuts_25==1)),line2,length(which(epimuts_100==1)),"maintained",length(which(epimuts_25==1 & epimuts_100==1))))
}

epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"A25","A100")
## [1] "A25"        "66"         "A100"       "116"        "maintained"
## [6] "39"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"B25","B100")
## [1] "B25"        "54"         "B100"       "112"        "maintained"
## [6] "25"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"C25","C100")
## [1] "C25"        "69"         "C100"       "166"        "maintained"
## [6] "41"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"D25","D100")
## [1] "D25"        "45"         "D100"       "93"         "maintained"
## [6] "20"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"F25","F100")
## [1] "F25"        "56"         "F100"       "87"         "maintained"
## [6] "33"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"G25","G100")
## [1] "G25"        "93"         "G100"       "121"        "maintained"
## [6] "56"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"H25","H100")
## [1] "H25"        "85"         "H100"       "122"        "maintained"
## [6] "59"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"I25","I100")
## [1] "I25"        "67"         "I100"       "217"        "maintained"
## [6] "40"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"J25","J100")
## [1] "J25"        "71"         "J100"       "118"        "maintained"
## [6] "47"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"K25","K100")
## [1] "K25"        "80"         "K100"       "178"        "maintained"
## [6] "44"
epimuts_maintained(ttg_segmentation_epimutable_genes_25_100,"L25","L100")
## [1] "L25"        "94"         "L100"       "141"        "maintained"
## [6] "43"
hist(apply(ttg_segmentation_epimutable_genes_25_100,FUN = mean,MARGIN=1))

hist(apply(ttg_segmentation_epimutable_genes_25_100,FUN = median,MARGIN=1))

epimuts_maintained_median<-function(df,line1,line2){
  median<-apply(ttg_segmentation_epimutable_genes_25_100,FUN = median,MARGIN=1)
  epimuts_25<-abs(df[,line1]-median)
  epimuts_100<-abs(df[,line2]-median)
  print(c(line1,length(which(epimuts_25==1)),line2,length(which(epimuts_100==1)),length(which(epimuts_25==1 & epimuts_100==1))))
}

epimut_persistence<-rbind(epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"A25","A100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"B25","B100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"C25","C100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"D25","D100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"F25","F100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"G25","G100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"H25","H100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"I25","I100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"J25","J100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"K25","K100"),
epimuts_maintained_median(ttg_segmentation_epimutable_genes_25_100,"L25","L100"))
## [1] "A25"  "65"   "A100" "67"   "14"  
## [1] "B25"  "85"   "B100" "81"   "25"  
## [1] "C25"  "66"   "C100" "121"  "17"  
## [1] "D25"  "76"   "D100" "46"   "12"  
## [1] "F25"  "69"   "F100" "52"   "22"  
## [1] "G25"  "74"   "G100" "72"   "22"  
## [1] "H25"  "56"   "H100" "71"   "19"  
## [1] "I25"  "50"   "I100" "176"  "11"  
## [1] "J25"  "58"   "J100" "67"   "15"  
## [1] "K25"  "73"   "K100" "147"  "25"  
## [1] "L25"  "79"   "L100" "100"  "15"
colnames(epimut_persistence)<-c("line_25","epimutations_25","line_100","epimutations_100","shared epimutations")

write.table(epimut_persistence,file="persistence_data.txt")

library(reprex)

epimut_persistence<-data.frame(epimut_persistence[,c(2,4,5)])
epimut_persistence$epimutations_25<-as.numeric(as.character(epimut_persistence$epimutations_25))
epimut_persistence$epimutations_100<-as.numeric(as.character(epimut_persistence$epimutations_100))
epimut_persistence$shared.epimutations<-as.numeric(as.character(epimut_persistence$shared.epimutations))

epimut_persistence$epimutations_25_unique<-epimut_persistence$epimutations_25-epimut_persistence$shared.epimutations
epimut_persistence$epimutations_100_unique<-epimut_persistence$epimutations_100-epimut_persistence$shared.epimutations



mat<-t(as.matrix(epimut_persistence[,c(3,4,5)]))[c(2,3,1),]
library(reshape2)
mat<-melt(mat)

mat$Var1<-factor(mat$Var1,levels(mat$Var1)[c(3,2,1)])

ggplot(mat)+
  geom_col(aes(x=Var2,y=value,group=Var1,fill=Var1))+
  theme_classic()+
  ylab("number of epimutations")+
  xlab("lineage")+
  scale_fill_manual(values = c("orange","#43a2ca","#a8ddb5"))

ggsave("epimutation_persistence_summary.pdf")
## Saving 7 x 5 in image
mat2<-melt(epimut_persistence[,c(1,2)])
## No id variables; using all as measure variables
ggplot(mat2,aes(x=factor(variable),y=value,fill=factor(variable),color=factor(variable)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=1,size=2,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))+theme_classic()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("epimutation_totals.pdf")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
median(epimut_persistence$epimutations_25)
## [1] 69
median(epimut_persistence$epimutations_100)
## [1] 72
wilcox.test(epimut_persistence$epimutations_25,epimut_persistence$epimutations_100)
## Warning in wilcox.test.default(epimut_persistence$epimutations_25,
## epimut_persistence$epimutations_100): cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  epimut_persistence$epimutations_25 and epimut_persistence$epimutations_100
## W = 44, p-value = 0.2933
## alternative hypothesis: true location shift is not equal to 0

Estimates of epimutation totals removing outlier samples

Pairwise

When we do the same analysis removing the 100th generation sample outliers, the amount of observed differences between 25th and 100th generation samples from the pairwise analysis is not significantly different.

Now it seems that the 75, 100 and 125 distances are higher than the rest, however this is an artefact since these three are between-batch comparisons (comparing the 25th and 100th generation samples that were grown and sequenced on separate experiments). So it makes more sense here to compare within batches (distance 50 and distance 200). In this case there is no difference in the number of changes observed between lines.

CIKL<-c("C100","I100","K100","L100")
ttg_MAplot_data_p1Eminus4_noCIKL<-ttg_MAplot_data_p1Eminus4[which(!(ttg_MAplot_data_p1Eminus4$line1 %in% CIKL) &
                                                                  !(ttg_MAplot_data_p1Eminus4$line2 %in% CIKL)),]
epimutable_genes_noCIKL<-unique(ttg_MAplot_data_p1Eminus4_noCIKL$ID)
length(epimutable_genes_noCIKL)
## [1] 307
ttg_DEseqnorm_epimutable_genes_noCIKL<-ttg_DEseqnorm[which(rownames(ttg_DEseqnorm) %in% epimutable_genes_noCIKL),-c(39,44,46,47)]

kmeans_row<-function(vector){  
  kmc<-kmeans(x = as.numeric(vector),centers=2)
  high<-kmc$cluster[which.is.max(vector)]
  if (high==2){return(kmc$cluster)
  }else{return(abs(kmc$cluster-3))}
}

library(nnet)

ttg_segmentation_epimutable_genes_noCIKL<-matrix(0,nrow=nrow(ttg_DEseqnorm_epimutable_genes_noCIKL),ncol = 43)
colnames(ttg_segmentation_epimutable_genes_noCIKL)<-colnames(ttg_DEseqnorm_epimutable_genes_noCIKL)
rownames(ttg_segmentation_epimutable_genes_noCIKL)<-rownames(ttg_DEseqnorm_epimutable_genes_noCIKL)

for (gene in rownames(ttg_DEseqnorm_epimutable_genes_noCIKL)){
      ttg_segmentation_epimutable_genes_noCIKL[gene,]<-kmeans_row(ttg_DEseqnorm_epimutable_genes_noCIKL[gene,])
}

heatmap.2(as.matrix(ttg_segmentation_epimutable_genes_noCIKL[,25:43]),col = colorRampPalette(c("#67a9cf","#de2d26")),Colv=FALSE,trace="none",dendrogram = "none")

heatmap.2(cor(as.matrix(ttg_segmentation_epimutable_genes_noCIKL[,25:43])),Colv=FALSE,trace="none",dendrogram = "none")

diffs<-c()
line1<-c()
line2<-c()
distances<-c()

for (l1 in colnames(ttg_segmentation_epimutable_genes_noCIKL[,25:43])){
  for (l2 in colnames(ttg_segmentation_epimutable_genes_noCIKL[,25:43])){
    
  diffs<-c(diffs,sum(abs(ttg_segmentation_epimutable_genes_noCIKL[,l1]-ttg_segmentation_epimutable_genes_noCIKL[,l2])))
  line1<-c(line1,l1); line2<-c(line2,l2)
  
  if (l1=="PMA"){l1<-"P0"}
  if (l2=="PMA"){l2<-"P0"}
        
  lineage_1<-substr(l1,start = 1,stop = 1)
  gen_1<-as.numeric(substr(l1,start=2,stop=nchar(l1)))
  lineage_2<-substr(l2,start = 1,stop = 1)
  gen_2<-as.numeric(substr(l2,start=2,stop=nchar(l2)))
        
  if (lineage_1!=lineage_2){
        distance=abs(gen_1+gen_2)
        }
  else if (lineage_1==lineage_2){
        distance=abs(gen_1-gen_2)
        }
  distances<-c(distances,distance)
  
  if (l1=="P0"){l1<-"PMA"}
  if (l2=="P0"){l2<-"PMA"}

}}


numchanges_df_noCIKL<-data.frame(num_changes=diffs,line1=line1,line2=line2,distance=distances)
ggplot(numchanges_df_noCIKL)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)))+scale_fill_brewer(aesthetics =c("fill"),palette = "GnBu")+theme_classic()

ggsave("number_of_changes_boxplot_gens25_100_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
numchanges_df_noCIKL_50_200<-numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50 | numchanges_df_noCIKL$distance==200),]
ggplot(numchanges_df_noCIKL_50_200)+geom_boxplot(aes(y=num_changes,x=factor(distance),fill=factor(distance)),width=0.5)+scale_fill_manual(values=c("#66c2a5", "#8da0cb"))+theme_classic()

ggsave("number_of_changes_boxplot_withinbatch_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
ggplot(numchanges_df_noCIKL_50_200,aes(x=factor(distance),y=num_changes,fill=factor(distance),color=factor(distance)))+geom_dotplot(binaxis = "y", stackdir = "center")+ stat_summary(fun.y=median, geom="point", shape=19,size=5,col="black")+scale_fill_manual(values = c("#a8ddb5","#43a2ca"),aesthetics = c("fill","color"))+
  theme_classic()
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("number_of_changes_dotplot_withinbatch_withoutCIKL.pdf",dpi="retina")
## Saving 7 x 5 in image
## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
#testing differences in the number of changes
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==200),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 200),     "num_changes"] and     "num_changes"]
## W = 2296, p-value = 0.9556
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==25),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 25),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50),     "num_changes"] and     "num_changes"]
## W = 800, p-value = 0.01235
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==50),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==75),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 50),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 75),     "num_changes"] and     "num_changes"]
## W = 412, p-value = 0.004741
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==75),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==100),"num_changes"])
## Warning in
## wilcox.test.default(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance
## == : cannot compute exact p-value with ties
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 75),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 100),     "num_changes"] and     "num_changes"]
## W = 66, p-value = 0.1466
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==100),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==125),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 100),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 125),     "num_changes"] and     "num_changes"]
## W = 1048, p-value = 0.6713
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==125),"num_changes"],
            numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance==200),"num_changes"])
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 125),  and numchanges_df_noCIKL[which(numchanges_df_noCIKL$distance == 200),     "num_changes"] and     "num_changes"]
## W = 5128, p-value = 2.731e-13
## alternative hypothesis: true location shift is not equal to 0

Tests for long term inheritance

plot_sRNA_levels<-function(gene,pval,padj,savestring){
  ordered_data_points<-as.numeric(ttg_DEseqnorm_epimutable_genes[gene,c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data<-data.frame(ttg_counts=ordered_data_points,line=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data$sample<-factor(data$line,levels=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data$generation<-c("PeMA",rep(c("25","100"),11)); data$generation<-factor(data$generation,levels=c("PeMA","25","100"))
  data$lineage<-c("PeMA","A","A","B","B","C","C","D","D","F","F","G","G","H","H","I","I","J","J","K","K","L","L")
  data$lineage<-factor(data$lineage,levels=c("PeMA","A","B","C","D","F","G","H","I","J","K","L"))
  print(ggplot(data)+geom_point(aes(y=ttg_counts,x=lineage,color=generation),size=3)+theme_classic()+ylab("normalised 22G counts")+xlab("lineage")+ggtitle(paste(paste(paste(paste(gene,"p",sep=","),pval,sep="="),"padj",sep=","),padj,sep="="))
  + scale_color_manual(values=c("#fc8d62", "#66c2a5", "#8da0cb")))
  if (savestring=="no"){
    return()}else{
  ggsave(filename = paste(paste(paste("../04_Figure2/",savestring,sep=""),gene,sep="_"),"pdf",sep="."),dpi="retina")}
}

Having defined the high and low 22G level states, we can test on a gene-by-gene basis whether the states in the 25th and 100th generation samples are more similar than you would expect. To test this, I randomise the order of the 1-2 assignations 100000 times, and calculate how many of these would result in an equal or greater number of matches between 25 and 100 generation states than observed.

Also, to incorporate the quantitative range of the data in the analysis, I did a similar analysis calculating the within-lineage variation vs overall variation in random matches of 25 and 100 generation samples.

Both analyses converge on the conclusion that there is no evidence for long-term inheritance in our dataset.

With the outlier samples

#filter out genes with all 1s or 2s in the 25th/100th gen samples

test_epigen_inheritance<-function(row){
  num_matches<-11-sum(abs(row[25:35]-row[37:47]))
  rand_matches_distribution<-rep(0,100000)
  for (i in seq(100000)){
    rand_25<-sample(row[25:35]); rand_100<-sample(row[37:47])
    rand_matches<-11-sum(abs(rand_25-rand_100))
    rand_matches_distribution[i]<-rand_matches
  }
  return(c(num_matches,length(which(rand_matches_distribution>=num_matches))/length(rand_matches_distribution)))
}

test_df<-data.frame(t(apply(ttg_segmentation_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance)))
colnames(test_df)<-c("num_matches","pval")
test_df$padj<-p.adjust(test_df$pval,method="fdr")
ggplot(test_df)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_df)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_df<-test_df[order(test_df$pval,decreasing = FALSE),]
ordered_test_df[1:10,]
##            num_matches    pval padj
## Y116F11A.1          10 0.02399    1
## C39B5.3              9 0.04462    1
## F58E10.7            10 0.05573    1
## Y43F8B.22            9 0.06221    1
## C27D8.2             11 0.09028    1
## Y76B12C.4a          11 0.09065    1
## F52C9.1.2           11 0.09075    1
## Y17D7C.3             8 0.10706    1
## Y17D7C.4             9 0.10903    1
## W01A11.3c            9 0.11103    1
write.table(ordered_test_df,"epistates_test_df_withCILK.txt")


for (gene in rownames(ordered_test_df[1:10,])){
  plot_sRNA_levels(gene,ordered_test_df[gene,"pval"],ordered_test_df[gene,"padj"],
                   savestring = "epistates_test_CILK/epistates_test_top10")
}
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

test_epigen_inheritance_variance<-function(row){
  num_matches<-sum((row[25:35]-row[37:47])**2)/11
  rand_matches_distribution<-rep(0,100000)
  for (i in seq(100000)){
    rand_25<-sample(row[25:35]); rand_100<-sample(row[37:47])
    rand_variance<-sum((rand_25-rand_100)**2)/11
    rand_matches_distribution[i]<-rand_variance
  }
  return(c(num_matches,length(which(rand_matches_distribution<=num_matches))/length(rand_matches_distribution)))
}

test_variance_df<-data.frame(t(apply(ttg_DEseqnorm_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance_variance)))

colnames(test_variance_df)<-c("num_matches","pval")
test_variance_df$padj<-p.adjust(test_variance_df$pval,method="fdr")
ggplot(test_variance_df)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_variance_df)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_variance_df<-test_variance_df[order(test_variance_df$pval,decreasing = FALSE),]
ordered_test_variance_df[1:10,]
##           num_matches    pval      padj
## F58G6.3  5.989801e+01 0.00013 0.0574600
## Y17D7C.3 4.929410e+06 0.00219 0.4839900
## F54F7.7  3.029012e+05 0.00337 0.4965133
## C27D8.2  4.556021e+01 0.00597 0.5134567
## K10D2.8  1.221207e+05 0.00611 0.5134567
## ZK228.1  3.681626e+04 0.00726 0.5134567
## R02F2.2  2.545700e+03 0.01259 0.5134567
## F35H10.5 4.067716e+02 0.01284 0.5134567
## F58E10.7 2.567370e+04 0.01332 0.5134567
## C48D1.6  1.167289e+03 0.01356 0.5134567
write.table(ordered_test_variance_df,"variance_test_df_withCILK.txt")


for (gene in rownames(ordered_test_variance_df[1:10,])){
  plot_sRNA_levels(gene,ordered_test_variance_df[gene,"pval"],ordered_test_variance_df[gene,"padj"],
                   savestring = "variance_test_CILK/variance_test_top10")
}
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

Same analysis after removing the outlier samples (CILK).

#filter out genes with all 1s or 2s in the 25th/100th gen samples

colnames(ttg_segmentation_epimutable_genes)[c(25,26,28:31,33,37,38,40:43,45)]
##  [1] "A25"  "B25"  "D25"  "F25"  "G25"  "H25"  "J25"  "A100" "B100" "D100"
## [11] "F100" "G100" "H100" "J100"
test_epigen_inheritance<-function(row){
  num_matches<-11-sum(abs(row[c(25,26,28:31,33)]-row[c(37,38,40:43,45)]))
  rand_matches_distribution<-rep(0,100000)
  for (i in seq(100000)){
    rand_25<-sample(row[c(25,26,28:31,33)]); rand_100<-sample(row[c(37,38,40:43,45)])
    rand_matches<-11-sum(abs(rand_25-rand_100))
    rand_matches_distribution[i]<-rand_matches
  }
  return(c(num_matches,length(which(rand_matches_distribution>=num_matches))/length(rand_matches_distribution)))
}

test_df_noCILK<-data.frame(t(apply(ttg_segmentation_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance)))
colnames(test_df_noCILK)<-c("num_matches","pval")
test_df_noCILK$padj<-p.adjust(test_df_noCILK$pval,method="fdr")
ggplot(test_df_noCILK)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withoutCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_df_noCILK)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_kmeans_withoutCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_df_noCILK<-test_df_noCILK[order(test_df_noCILK$pval,decreasing = FALSE),]
ordered_test_df_noCILK[1:10,]
##            num_matches    pval padj
## Y116F11A.1          11 0.02806    1
## K11D2.2.1           10 0.11629    1
## F58E10.7            11 0.14137    1
## C27D8.2             11 0.14328    1
## K10D2.8             11 0.14386    1
## F52C9.1.2           11 0.14398    1
## F56D6.15            10 0.14488    1
## T23F6.3a            10 0.28199    1
## Y17D7C.3             9 0.28335    1
## Y49F6A.10            9 0.28350    1
for (gene in rownames(ordered_test_df_noCILK[1:10,])){
  plot_sRNA_levels(gene,ordered_test_df_noCILK[gene,"pval"],ordered_test_df_noCILK[gene,"padj"],
                   savestring = "epistates_test_noCILK/epistates_test_noCILK_top10")
}
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

write.table(ordered_test_df_noCILK,"epistates_test_df_noCILK.txt")


test_epigen_inheritance_variance<-function(row){
  num_matches<-sum((row[c(25,26,28:31,33)]-row[c(37,38,40:43,45)])**2)/11
  rand_matches_distribution<-rep(0,100000)
  for (i in seq(100000)){
    rand_25<-sample(row[c(25,26,28:31,33)]); rand_100<-sample(row[c(37,38,40:43,45)])
    rand_variance<-sum((rand_25-rand_100)**2)/11
    rand_matches_distribution[i]<-rand_variance
  }
  return(c(num_matches,length(which(rand_matches_distribution<=num_matches))/length(rand_matches_distribution)))
}

test_variance_df_noCILK<-data.frame(t(apply(ttg_DEseqnorm_epimutable_genes,MARGIN=1,FUN=test_epigen_inheritance_variance)))

colnames(test_variance_df_noCILK)<-c("num_matches","pval")
test_variance_df_noCILK$padj<-p.adjust(test_variance_df_noCILK$pval,method="fdr")
ggplot(test_variance_df_noCILK)+geom_histogram(aes(x=pval),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withoutCILK.pdf")
## Saving 7 x 5 in image
ggplot(test_variance_df_noCILK)+geom_histogram(aes(x=padj),breaks=seq(0,1,1/30))

ggsave("../04_Figure2/epigenetic_inheritance_test_variance_withoutCILK_padj.pdf")
## Saving 7 x 5 in image
ordered_test_variance_df_noCILK<-test_variance_df_noCILK[order(test_variance_df_noCILK$pval,decreasing = FALSE),]
ordered_test_variance_df_noCILK[1:10,]
##           num_matches    pval      padj
## C27D8.2  3.965678e+01 0.00260 0.4196544
## R02F2.2  8.524297e+00 0.00292 0.4196544
## F56D6.15 8.313818e+02 0.00392 0.4196544
## K09E3.6  1.957955e+03 0.00601 0.4196544
## Y17D7C.3 4.400638e+06 0.00633 0.4196544
## Y37H2B.1 1.091051e+03 0.00685 0.4196544
## F07B7.12 5.266671e+00 0.00691 0.4196544
## F54F7.7  1.751573e+04 0.00835 0.4196544
## F21D9.7  5.991030e+04 0.01108 0.4196544
## C46G7.3  3.133321e+04 0.01341 0.4196544
write.table(ordered_test_variance_df_noCILK,"variance_test_df_noCILK.txt")


for (gene in rownames(ordered_test_variance_df_noCILK[1:10,])){
  plot_sRNA_levels(gene,ordered_test_variance_df_noCILK[gene,"pval"],ordered_test_variance_df_noCILK[gene,"padj"],
                   savestring = "variance_test_noCILK/variance_test_noCILK_top10")
}
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

Plot sRNA levels for supplementary figure showing some of the top ranked genes

plot_sRNA_levels<-function(gene,pval,padj,N,savestring){
  ordered_data_points<-as.numeric(ttg_DEseqnorm_epimutable_genes[gene,c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data<-data.frame(ttg_counts=ordered_data_points,line=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data$sample<-factor(data$line,levels=colnames(ttg_DEseqnorm_epimutable_genes)[c(36,25,37,26,38,27,39,28,40,29,41,30,42,31,43,32,44,33,45,34,46,35,47)])
  data$generation<-c("PeMA",rep(c("25","100"),11)); data$generation<-factor(data$generation,levels=c("PeMA","25","100"))
  data$lineage<-c("PeMA","A","A","B","B","C","C","D","D","F","F","G","G","H","H","I","I","J","J","K","K","L","L")
  data$lineage<-factor(data$lineage,levels=c("PeMA","A","B","C","D","F","G","H","I","J","K","L"))
  print(ggplot(data)+geom_point(aes(y=ttg_counts,x=lineage,color=generation),size=3)+theme_classic()+ylab("normalised 22G counts")+xlab("lineage")+ggtitle(paste(paste(paste(paste(paste(paste(gene,"p variance test",sep=","),pval,sep="="),"p epistates test",sep=","),padj,sep="="),"number of matches",sep=""),N,sep="="))
  + scale_color_manual(values=c("#fc8d62", "#66c2a5", "#8da0cb")))
  if (savestring=="no"){
    return()}else{
  ggsave(filename = paste(paste(paste("../04_Figure2/",savestring,sep=""),gene,sep="_"),"pdf",sep="."),dpi="retina")}
}



plot_sRNA_levels("F52C9.1.2",0.07446,0.08984,11,"plots_for_suppfig2/")
## Saving 7 x 5 in image

plot_sRNA_levels("Y116F11A.1",0.02376,0.02414,10,"plots_for_suppfig2/")
## Saving 7 x 5 in image

plot_sRNA_levels("K10D2.8",0.00589,0.18263,10,"plots_for_suppfig2/")
## Saving 7 x 5 in image

plot_sRNA_levels("C27D8.2",0.00624,0.09163,11,"plots_for_suppfig2/")
## Saving 7 x 5 in image

ttg_MAplot_data_p1Eminus4[which(ttg_MAplot_data_p1Eminus4$line2=="PMA" & ttg_MAplot_data_p1Eminus4$fc<0),]
##               ID      mean        fc         padj line1 line2
## 319      T03F6.1 12.825567 -1.659804 1.882552e-08   A25   PMA
## 321      T10D4.5  5.911864 -4.089286 1.050306e-15   A25   PMA
## 323   Y65B4BL.7a  4.866717 -3.072315 7.356761e-07   A25   PMA
## 1202   C07G3.9.2 10.130461 -1.805331 2.619946e-11   B25   PMA
## 1207     C55C3.7  7.043928 -5.346549 4.277422e-77   B25   PMA
## 1212 F23B12.4c.2  7.958701 -1.367157 6.647172e-05   B25   PMA
## 1216     F42G8.5  7.342828 -2.372058 2.932981e-15   B25   PMA
## 1218     K10G9.2  9.279968 -1.425779 4.216102e-06   B25   PMA
## 1220   M01E11.4c  8.602249 -1.683475 2.646414e-08   B25   PMA
## 1229  Y65B4BL.7a  5.205076 -3.446094 2.454591e-16   B25   PMA
## 1232     ZC190.7  4.369778 -2.845912 3.166586e-07   B25   PMA
## 1233     ZC190.8  6.214683 -3.120048 2.422865e-19   B25   PMA
## 1737    C02F5.10  8.041969 -2.554854 1.720883e-07   C25   PMA
## 1742    F40D4.17  8.285054 -2.421074 6.462507e-07   C25   PMA
## 1744     T03F6.1 12.929700 -1.795386 1.576476e-14   C25   PMA
## 1747     T10D4.5  5.228074 -3.353165 8.100023e-07   C25   PMA
## 2562     B0261.8 11.014256 -1.298769 2.999386e-07   D25   PMA
## 2565     C26C6.3  9.860089 -1.473888 3.266113e-08   D25   PMA
## 2567     C46A5.8  7.306049 -1.458529 2.571334e-07   D25   PMA
## 2576   F52C9.1.2  8.203823 -1.711948 9.849541e-11   D25   PMA
## 2577    F55G1.10  7.965168 -3.169797 3.174355e-40   D25   PMA
## 2579    H14A12.5  8.684057 -3.963749 2.709580e-62   D25   PMA
## 2583     T23D8.6  6.904268 -1.937391 1.658645e-13   D25   PMA
## 2586     W02B8.3  6.935998 -2.319795 7.087033e-20   D25   PMA
## 2590  Y65B4BL.7a  5.096796 -3.327460 4.787028e-18   D25   PMA
## 3107     B0511.3  7.308011 -3.491121 2.154514e-20   F25   PMA
## 3108     B0511.4  5.608469 -5.190702 4.650496e-31   F25   PMA
## 3110     C07G1.7  3.649766 -4.591119 1.593172e-09   F25   PMA
## 3116     T03F6.1 12.975479 -1.854088 1.275609e-11   F25   PMA
## 3121     T20F5.5  7.501455 -2.814376 3.027300e-13   F25   PMA
## 3816     C27D8.1  5.343421 -6.325543 1.389595e-26   G25   PMA
## 3818     C36B7.6  7.079757 -2.675156 3.177847e-06   G25   PMA
## 3823     F46F5.3  5.712026 -2.926872 4.701529e-05   G25   PMA
## 3827    F55G1.10  7.445012 -2.578312 3.177847e-06   G25   PMA
## 3829    H14A12.5  7.224404 -2.332426 6.941691e-05   G25   PMA
## 3831   M01E11.4c  9.009509 -2.197179 5.880084e-05   G25   PMA
## 3836     T28F2.1  4.427532 -4.490946 8.460361e-09   G25   PMA
## 3843   Y60A3A.24  5.660550 -2.905159 5.880084e-05   G25   PMA
## 4229     C07G1.7  3.268587 -4.191724 2.045709e-05   H25   PMA
## 4232    C54E10.1  4.764621 -4.296434 2.059473e-10   H25   PMA
## 4236     T03F6.1 12.736277 -1.541063 1.468405e-06   H25   PMA
## 4238     T10D4.5  5.333945 -3.468990 4.531941e-08   H25   PMA
## 4904     C07G1.7  2.990848 -3.897126 1.077536e-05   I25   PMA
## 4905    C16C8.20  7.036072 -1.857768 6.197790e-06   I25   PMA
## 4909    C54E10.1  3.703600 -3.153358 4.882505e-05   I25   PMA
## 4912 F23B12.4c.2  8.490022 -2.061240 4.818831e-09   I25   PMA
## 4914     T03F6.1 12.829266 -1.664671 4.818831e-09   I25   PMA
## 4922    Y37E3.5c  7.177292 -2.135552 2.195466e-08   I25   PMA
## 5376     C28A5.6  7.665463 -1.922619 1.049815e-08   J25   PMA
## 5379    C56G7.3c  8.620526 -2.120803 1.416419e-11   J25   PMA
## 5382     F22F1.2  6.934125 -1.888810 2.004784e-07   J25   PMA
## 5383 F23B12.4c.2  8.701787 -2.319442 5.416374e-14   J25   PMA
## 5384    F31C3.11  5.944217 -1.914252 3.902400e-05   J25   PMA
## 5386     F39D8.7 10.655019 -1.330376 2.163909e-05   J25   PMA
## 5387    F49E11.2  7.689390 -1.598116 9.696719e-06   J25   PMA
## 5388     T03F6.1 12.668111 -1.448709 1.049815e-08   J25   PMA
## 5399  Y65B4BL.7a  4.900947 -3.110561 2.940171e-09   J25   PMA
## 5798     C07G1.7  3.829594 -4.777943 4.546856e-11   K25   PMA
## 5800    C16C8.20  7.194230 -2.056674 6.488058e-06   K25   PMA
## 5805     C48B6.9  6.023986 -3.986632 6.933766e-20   K25   PMA
## 5806    C54E10.1  3.900281 -3.370558 3.036123e-05   K25   PMA
## 5807    C56G7.3c  8.741887 -2.268681 8.147561e-08   K25   PMA
## 5810   F45H10.1b  6.974302 -1.956925 2.954829e-05   K25   PMA
## 5812    F55G1.10  8.254398 -3.487860 1.871137e-19   K25   PMA
## 5814    K07H8.6c  4.948040 -2.756077 1.122571e-05   K25   PMA
## 5817   M01E11.4c  8.992328 -2.176224 2.932948e-07   K25   PMA
## 5820      R186.1  7.590265 -2.031059 6.488058e-06   K25   PMA
## 5821   T05D4.1.2  5.947429 -2.915467 7.181512e-10   K25   PMA
## 5825    W04B5.3c  8.420392 -1.818887 9.585423e-05   K25   PMA
## 5835    ZK1098.7  8.047849 -2.034949 5.377481e-06   K25   PMA
## 6199     B0511.5  6.771525 -2.240504 9.208740e-06   L25   PMA
## 6200     C07G1.7  3.289531 -4.213805 2.515365e-06   L25   PMA
## 6202     C36B7.6  7.023999 -2.610469 5.983600e-08   L25   PMA
## 6204    C49G7.11  5.227873 -3.095880 9.254738e-07   L25   PMA
## 6209     F47A4.5  7.106980 -2.222723 3.406153e-06   L25   PMA
## 6210     F48E3.9  6.576009 -2.127470 8.215926e-05   L25   PMA
## 6214    F55G1.10  7.345425 -2.461358 9.449843e-08   L25   PMA
## 6216    F58E10.7  7.358035 -2.026011 2.271277e-05   L25   PMA
## 6222     R10F2.1 11.391570 -1.771338 2.515365e-06   L25   PMA
## 6223     T03F6.1 12.915116 -1.776573 1.028825e-06   L25   PMA
## 6231   Y60A3A.24  5.643993 -2.886377 9.254738e-07   L25   PMA